## Run txi import
#library(tximport)
library(stringr)
#library(colorspace)
library(DESeq2)
library(ggfortify)
#library(sva)
#library(pcaExplorer)
library(extrafont)
library("AnnotationDbi")
library("org.Hs.eg.db")
library(topGO)
library(DT)
library(dplyr)
library(readr)
library(gplots)
library(RColorBrewer)
library(viridis)
library(ggpubr)
library(reshape)
library(ggrepel)
library(data.table)
library(knitr)
library(apeglm)
library(gridExtra)
library("pheatmap")
library(grid) ## Need to attach (and not just load) grid package
library(geneplotter)
library(tibble)
library(threejs)
library(EnsDb.Hsapiens.v86)
library(rhdf5)
library(kableExtra)
library(openxlsx)
library(clusterProfiler)
library(magrittr)
library("pathview")
library(DOSE)
#library(EnsDb.Hsapiens.v75)
library(scales)
library(msigdbr)
library(fgsea)
library(tidyr)
library(cowplot)
#library(ComplexHeatmap)
#library(UpSetR)
#library(circlize)
library(enrichplot)
#setwd("F:/research/Sanyi")
tableGrob2 <- function(d, p = NULL) {
# has_package("gridExtra")
d <- d[order(rownames(d)),]
tp <- gridExtra::tableGrob(d)
if (is.null(p)) {
return(tp)
}
# Fix bug: The 'group' order of lines and dots/path is different
p_data <- ggplot_build(p)$data[[1]]
# pcol <- unique(ggplot_build(p)$data[[1]][["colour"]])
p_data <- p_data[order(p_data[["group"]]), ]
pcol <- unique(p_data[["colour"]])
## This is fine too
## pcol <- unique(p_data[["colour"]])[unique(p_data[["group"]])]
j <- which(tp$layout$name == "rowhead-fg")
for (i in seq_along(pcol)) {
tp$grobs[j][[i+1]][["gp"]] <- gpar(col = pcol[i])
}
return(tp)
}
gseaplot2_v2 = function (x, geneSetID, title = "", color = "green", base_size = 11,
rel_heights = c(1.5, 0.5, 1), subplots = 1:3, pvalue_table = FALSE,
ES_geom = "line", ylim_topplot=NULL, addhline_topplot=FALSE)
{
ES_geom <- match.arg(ES_geom, c("line", "dot"))
geneList <- position <- NULL
if (length(geneSetID) == 1) {
gsdata <- enrichplot:::gsInfo(x, geneSetID)
} else {
gsdata <- do.call(rbind, lapply(geneSetID, enrichplot:::gsInfo, object = x))
}
p <- ggplot(gsdata, aes_(x = ~x)) + xlab(NULL) + theme_classic(base_size) +
theme(panel.grid.major = element_line(colour = "grey92"),
panel.grid.minor = element_line(colour = "grey92"),
panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank()) +
scale_x_continuous(expand = c(0, 0))
if (ES_geom == "line") {
es_layer <- geom_line(aes_(y = ~runningScore, color = ~Description),
size = 1)
}
else {
es_layer <- geom_point(aes_(y = ~runningScore, color = ~Description),
size = 1, data = subset(gsdata, position == 1))
}
p.res <- p + es_layer + theme(legend.position = c(0.8, 0.8),
legend.title = element_blank(), legend.background = element_rect(fill = "transparent"))
p.res <- p.res + ylab("Running Enrichment Score") + theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.line.x = element_blank(),
plot.margin = margin(t = 0.2, r = 0.2, b = 0, l = 0.2,
unit = "cm"))
if(!is.null(addhline_topplot)){
p.res <- p.res + geom_hline(yintercept=0, colour="grey40")
}
if(!is.null(ylim_topplot)){
p.res <- p.res + coord_cartesian(ylim = ylim_topplot)
}
i <- 0
for (term in unique(gsdata$Description)) {
idx <- which(gsdata$ymin != 0 & gsdata$Description ==
term)
gsdata[idx, "ymin"] <- i
gsdata[idx, "ymax"] <- i + 1
i <- i + 1
}
p2 <- ggplot(gsdata, aes_(x = ~x)) + geom_linerange(aes_(ymin = ~ymin,
ymax = ~ymax, color = ~Description)) + xlab(NULL) +
ylab(NULL) + theme_classic(base_size) + theme(legend.position = "none",
plot.margin = margin(t = -0.1, b = 0, unit = "cm"),
axis.ticks = element_blank(), axis.text = element_blank(),
axis.line.x = element_blank()) + scale_x_continuous(expand = c(0,
0)) + scale_y_continuous(expand = c(0, 0))
if (length(geneSetID) == 1) {
v <- seq(1, sum(gsdata$position), length.out = 9)
inv <- findInterval(rev(cumsum(gsdata$position)), v)
if (min(inv) == 0)
inv <- inv + 1
col = c(rev(brewer.pal(5, "Blues")), brewer.pal(5, "Reds"))
ymin <- min(p2$data$ymin)
yy <- max(p2$data$ymax - p2$data$ymin) * 0.3
xmin <- which(!duplicated(inv))
xmax <- xmin + as.numeric(table(inv)[unique(inv)])
d <- data.frame(ymin = ymin, ymax = yy, xmin = xmin,
xmax = xmax, col = col[unique(inv)])
p2 <- p2 + geom_rect(aes_(xmin = ~xmin, xmax = ~xmax,
ymin = ~ymin, ymax = ~ymax, fill = ~I(col)), data = d,
alpha = 0.9, inherit.aes = FALSE)
}
df2 <- p$data
df2$y <- p$data$geneList[df2$x]
p.pos <- p + geom_segment(data = df2, aes_(x = ~x, xend = ~x,
y = ~y, yend = 0), color = "grey")
p.pos <- p.pos + ylab("Ranked list metric") + xlab("Rank in Ordered Dataset") +
theme(plot.margin = margin(t = -0.1, r = 0.2, b = 0.2,
l = 0.2, unit = "cm"))
if (!is.null(title) && !is.na(title) && title != "")
p.res <- p.res + ggtitle(title)
if (length(color) == length(geneSetID)) {
p.res <- p.res + scale_color_manual(values = color)
if (length(color) == 1) {
p.res <- p.res + theme(legend.position = "none")
p2 <- p2 + scale_color_manual(values = "black")
} else {
p2 <- p2 + scale_color_manual(values = color)
}
}
if (pvalue_table) {
# pd <- x[geneSetID, c("Description", "pvalue", "p.adjust")]
# pd <- pd[order(pd[, 1], decreasing = FALSE), ]
# rownames(pd) <- pd$Description
# pd <- pd[, -1]
pd <- x[geneSetID, c("enrichmentScore", "NES", "pvalue", "p.adjust")]
rownames(pd) <- ""
# pd <- pd[, ]
pd[,1:2] <- round(pd[,1:2],2)
pd[,3:4] <- round(pd[,3:4],3)
colnames(pd)[1] <- "ES"
pd <- round(pd, 4)
tp <- tableGrob2(pd, p.res)
p.res <- p.res + theme(legend.position = "none") + annotation_custom(tp,
xmin = quantile(p.res$data$x, 0.5), xmax = quantile(p.res$data$x,
0.95), ymin = quantile(p.res$data$runningScore,
0.75), ymax = quantile(p.res$data$runningScore,
0.9))
}
plotlist <- list(p.res, p2, p.pos)[subplots]
n <- length(plotlist)
plotlist[[n]] <- plotlist[[n]] + theme(axis.line.x = element_line(),
axis.ticks.x = element_line(), axis.text.x = element_text())
if (length(subplots) == 1)
return(plotlist[[1]] + theme(plot.margin = margin(t = 0.2,
r = 0.2, b = 0.2, l = 0.2, unit = "cm")))
if (length(rel_heights) > length(subplots))
rel_heights <- rel_heights[subplots]
plot_grid(plotlist = plotlist, ncol = 1, align = "v", rel_heights = rel_heights)
}
cancers <- c("ACC","BLCA","BRCA","CESC","CHOL", "COAD","DLBC", "ESCA", "GBM", "HNSC", "KICH","KIRC","KIRP", "LGG", "LIHC", "LUAD", "LUSC", "MESO","OV", "PAAD", "PCPG", "PRAD", "READ","SARC", "SKCM","STAD","TGCT", "THCA", "THYM", "UCEC","UCS")
mycancer = params$selected_cancers_dge
load(paste0("./data/TCGA_",gsub("-","_", mycancer), "_RNASeq_STAR_primary.RData"))
rnaseq<-assay(data, "unstranded")
rnaseq <- rnaseq[,order(colnames(rnaseq))]
colnames(rnaseq)<-substr(colnames(rnaseq),1,12)
rnaseq<-rnaseq[,!duplicated(colnames(rnaseq))]
rnaseq <- as.matrix(rnaseq)
trnaseq <- as.data.frame(t(rnaseq))
trnaseq$PatientID <- rownames(trnaseq)
colnames(trnaseq) = gsub("\\..*","",colnames(trnaseq))
merged <- allclinical[allclinical$disease==mycancer,]
merged = merge(merged, trnaseq,by.x="submitter_id",by.y="PatientID")
ensembl_id <- mapIds(org.Hs.eg.db, keys = params$selected_gene,
column = "ENSEMBL", keytype = "SYMBOL", multiVals = "first")
colnames(merged)[colnames(merged) == ensembl_id] <- params$selected_gene
ensembl2genes <- as.data.frame(genes(EnsDb.Hsapiens.v86,return.type="DataFrame"))
ensembl2genes$entrezid_v2=mapIds(org.Hs.eg.db,
keys=ensembl2genes$gene_id,
column="ENTREZID",
keytype="ENSEMBL",
multiVals="first")
ensembl2genes_unique = ensembl2genes[(!is.na(ensembl2genes$entrezid_v2) & (ensembl2genes$seq_name %in% c(1:22,"X","Y","MT"))),]
ensembl2genes_unique = ensembl2genes_unique[ensembl2genes_unique$gene_id %in% gsub("\\..*","",colnames(trnaseq)),]
ensembl2genes_unique$entrezid_v2 = ifelse(ensembl2genes_unique$gene_id %in% c("ENSG00000177257","ENSG00000186599","ENSG00000198129","ENSG00000226023","ENSG00000283154","ENSG00000236125","ENSG00000111215","ENSG00000107014","ENSG00000278662"), ensembl2genes_unique$entrezid, ensembl2genes_unique$entrezid_v2)
ensembl2genes_unique = ensembl2genes_unique[!(ensembl2genes_unique$gene_id %in% c("ENSG00000261832", "ENSG00000263715", "ENSG00000258653", "ENSG00000283563", "ENSG00000273167", "ENSG00000268861", "ENSG00000267645", "ENSG00000261130", "ENSG00000256966", "ENSG00000185040", "ENSG00000269226", "ENSG00000237541", "ENSG00000257046", "ENSG00000249624", "ENSG00000174353", "ENSG00000255330", "ENSG00000178934", "ENSG00000283201", "ENSG00000228696", "ENSG00000275740", "ENSG00000258724", "ENSG00000183336", "ENSG00000177243", "ENSG00000269845", "ENSG00000255508", "ENSG00000205571", "ENSG00000248472", "ENSG00000233614", "ENSG00000215190", "ENSG00000274512", "ENSG00000205572", "ENSG00000260548", "ENSG00000272916", "ENSG00000260128", "ENSG00000258790", "ENSG00000280987")), ]
ensembl2genes_unique = ensembl2genes_unique[ensembl2genes_unique$gene_biotype %in% c("IG_C_gene", "IG_D_gene", "IG_J_gene", "IG_V_gene", "protein_coding", "ribozyme", "TR_C_gene", "TR_D_gene", "TR_J_gene", "TR_V_gene"),]
msigdbr_df01 <- msigdbr(species = "Homo sapiens", category = "H")
msigdbr_df02 <- msigdbr(species = "Homo sapiens", category = "C2")
msigdbr_df = rbind(msigdbr_df01, msigdbr_df02)
msigdbr_df = msigdbr_df[grepl("^HALLMARK|^KEGG|^BIOCARTA|^REACTOME|^WP", msigdbr_df$gs_name),]
msigdbr_df_2columns=msigdbr_df[,c("gs_name", "entrez_gene")]
pathwaysHandC2 = split(x = msigdbr_df$entrez_gene, f = msigdbr_df$gs_name)
msigdbr_df01 <- msigdbr(species = "Homo sapiens", category = "H")
msigdbr_df02 <- msigdbr(species = "Homo sapiens", category = "C2")
msigdbr_df = rbind(msigdbr_df01, msigdbr_df02)
msigdbr_df_other = msigdbr_df[!grepl("^HALLMARK|^KEGG|^BIOCARTA|^REACTOME|^WP", msigdbr_df$gs_name),]
msigdbr_df_2columns_other=msigdbr_df_other[,c("gs_name", "entrez_gene")]
msigdbr_df_GO <- msigdbr(species = "Homo sapiens", category = "C5")
msigdbr_df_2columns_GO=msigdbr_df_GO[,c("gs_name", "entrez_gene")]
limit1=params$selected_percentage/100
limit2=(100-params$selected_percentage)/100
subset =merged
subset$status = factor(ifelse(subset[,params$selected_gene]<quantile(subset[,params$selected_gene],c(0,limit1, limit2,1))[2],"low" ,ifelse(subset[,params$selected_gene]>quantile(subset[,params$selected_gene],c(0,limit1, limit2,1))[3],"high", NA)), levels=c("low", "high"))
subset = subset[!is.na(subset$status),]
dds <- DESeqDataSetFromMatrix(countData = as.matrix(t(subset[,gsub("\\..*","",colnames(subset)) %in% ensembl2genes_unique$gene_id])),
colData = subset[,c(1:58, ncol(subset))],
design= ~ status)
dds_de <- DESeq(dds)
#res_HighvsLow <- results(dds_de, contrast=c("status","high", "low"), alpha=0.05)
res_HighvsLow <- lfcShrink(dds=dds_de, coef="status_high_vs_low", type="apeglm")
res_HighvsLow_df = as.data.frame(res_HighvsLow)
res_HighvsLow_df = merge(res_HighvsLow_df,ensembl2genes_unique, by.x="row.names", by.y="gene_id")
res_HighvsLow_df = as.data.frame(res_HighvsLow_df[order(res_HighvsLow_df$padj),])
This figure is only available in the downloaded report. To view it:
1. Download the report (HTML file)
2. Open the file on your computer
Once you have done this, all figures and tables will be visible.
If you hover over the data points on this plot with your mouse, more details will be revealed, including the gene name.
#```{r,warning=F, message=F,fig.show = 'hold', out.width = c('50%','50%'), eval = TRUE}
library(plotly)
# Assuming your data frame is called 'res_HighvsLow_df '
# If not, replace 'res_HighvsLow_df ' with your actual data frame name
# Create the ggplot object for the volcano plot
volcano_plot <- ggplot(res_HighvsLow_df , aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = ifelse(padj < 0.05 & abs(log2FoldChange) > 1, "Significant", "Not Significant"),
text = paste("Gene:", gene_name,
"<br>log2FoldChange:", round(log2FoldChange, 2),
"<br>padj:", signif(padj, 3),
"<br>Gene Biotype:", gene_biotype)),
alpha = 0.6) +
scale_color_manual(values = c("Significant" = "red", "Not Significant" = "gray")) +
theme_minimal() +
labs(title = "Interactive Volcano Plot",
x = "log2 Fold Change",
y = "-log10 Adjusted p-value") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "blue") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "blue") +
theme(legend.position = "none") # Remove the legend
# Convert ggplot to an interactive plotly object
interactive_volcano <- ggplotly(volcano_plot, tooltip = "text")
# Customize the layout
interactive_volcano <- interactive_volcano %>%
layout(hoverlabel = list(bgcolor = "white"),
annotations = list(
list(x = 1.02, y = 1.05,
text = "Click and drag to zoom<br>Double-click to reset",
xref = "paper", yref = "paper",
showarrow = FALSE)
))
# Display the interactive plot
interactive_volcano
volcano_plot <- ggplot(res_HighvsLow_df, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = ifelse(padj < 0.05 & abs(log2FoldChange) > 1, "Significant", "Not Significant"),
size = baseMean), alpha = 0.6) +
scale_color_manual(values = c("Significant" = "red", "Not Significant" = "gray")) +
scale_size_continuous(range = c(1, 5)) +
theme_minimal() +
labs(title = "Enhanced Volcano Plot",
x = "log2 Fold Change",
y = "-log10 Adjusted p-value",
color = "Significance",
size = "Base Mean") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "blue") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "blue") +
geom_text_repel(data = subset(res_HighvsLow_df, padj < 0.01 & abs(log2FoldChange) > 2),
aes(label = gene_name), max.overlaps = 20)
print(volcano_plot)
datatable(
res_HighvsLow_df[res_HighvsLow_df$padj < 0.05,
c("symbol", "baseMean", "log2FoldChange", "lfcSE", "pvalue", "padj")],
caption = '',
filter = 'top',
options = list(
pageLength = 10,
autoWidth = TRUE,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
),
extensions = 'Buttons',
class = "compact",
rownames = FALSE
)
This table is only available in the downloaded report. To view it:geneList = res_HighvsLow_df$log2FoldChange
names(geneList) = res_HighvsLow_df$entrezid_v2
geneList<-na.omit(geneList)
geneList <- sort(geneList, decreasing = TRUE)
#length(geneList)
geneList=geneList[!duplicated(names(geneList))]
#length(geneList)
GSEAres <- GSEA(geneList, TERM2GENE = msigdbr_df_2columns, minGSSize = 10, pvalueCutoff = 1)
mypathways_table=GSEAres[order(GSEAres$p.adjust),]
mypathways=mypathways_table[mypathways_table$p.adjust<0.05,"ID"]
datatable(mypathways_table[mypathways_table$p.adjust<0.05,c("ID", "enrichmentScore", "NES", "pvalue", "p.adjust")], caption = paste0('Pathways, p.adjust<0.05'), filter = 'top', options = list(
pageLength = 10,
autoWidth = TRUE,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
),
extensions = 'Buttons',
class = "compact",
rownames = FALSE
)
GSEAres_core = GSEAres
This table is only available in the downloaded report. To view it:
1. Download the report (HTML file)
2. Open the file on your computer
Once you have done this, all figures and tables will be visible.
if(length(mypathways)>0){
nocomment = sapply(mypathways[1:min(length(mypathways),10)], function(x) { p1 = gseaplot2_v2(GSEAres, geneSetID = x, title = paste0(str_to_title(x), ""), ylim_topplot=NULL, pvalue_table = T, addhline_topplot=T);print(p1)})
}
GSEAres <- GSEA(geneList, TERM2GENE = msigdbr_df_2columns_GO, minGSSize = 10, pvalueCutoff = 1)
mypathways_table=GSEAres[order(GSEAres$p.adjust),]
mypathways=mypathways_table[mypathways_table$p.adjust<0.05,"ID"]
datatable(mypathways_table[mypathways_table$p.adjust<0.05,c("ID", "enrichmentScore", "NES", "pvalue", "p.adjust")], caption = paste0('Pathways, p.adjust<0.05'), filter = 'top', options = list(
pageLength = 10,
autoWidth = TRUE,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
),
extensions = 'Buttons',
class = "compact",
rownames = FALSE
)
GSEAres_go= GSEAres